# shapes_params = {
#     "betas": (1000,),
#     "alphas_cumprod": (1000,),
#     "alphas_cumprod_prev": (1000,),
#     "sqrt_alphas_cumprod": (1000,),
#     "sqrt_one_minus_alphas_cumprod": (1000,),
#     "log_one_minus_alphas_cumprod": (1000,),
#     "sqrt_recip_alphas_cumprod": (1000,),
#     "sqrt_recipm1_alphas_cumprod": (1000,),
#     "posterior_variance": (1000,),
#     "posterior_log_variance_clipped": (1000,),
#     "posterior_mean_coef1": (1000,),
#     "posterior_mean_coef2": (1000,),
#     "model_ema.decay": (),
#     "model_ema.num_updates": (),
# }

shapes_params = {
    "temb_coefficients_fp32": (160,),
    "causal_mask": (1, 1, 77, 77),
    "aux_output_conv.weight": (12,),
    "aux_output_conv.bias": (3,),
    "alphas_cumprod": (1000,),
    'temb_coefficients_fp16' : (160,),
}


shapes_unet = {
    "model.diffusion_model.time_embed.0.weight": (1280, 320),
    "model.diffusion_model.time_embed.0.bias": (1280,),
    "model.diffusion_model.time_embed.2.weight": (1280, 1280),
    "model.diffusion_model.time_embed.2.bias": (1280,),
    "model.diffusion_model.input_blocks.0.0.weight": (320, 4, 3, 3),
    "model.diffusion_model.input_blocks.0.0.bias": (320,),
    "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": (320,),
    "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": (320,),
    "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": (320, 320, 3, 3),
    "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": (320,),
    "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": (320, 1280),
    "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": (320,),
    "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": (320,),
    "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": (320,),
    "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": (320, 320, 3, 3),
    "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": (320,),
    "model.diffusion_model.input_blocks.1.1.norm.weight": (320,),
    "model.diffusion_model.input_blocks.1.1.norm.bias": (320,),
    "model.diffusion_model.input_blocks.1.1.proj_in.weight": (320, 320, 1, 1),
    "model.diffusion_model.input_blocks.1.1.proj_in.bias": (320,),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": (
        320,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": (
        2560,
        320,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": (
        2560,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight": (
        320,
        1280,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias": (320,),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": (
        320,
        768,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": (
        320,
        768,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": (
        320,
    ),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight": (320,),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias": (320,),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight": (320,),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias": (320,),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight": (320,),
    "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias": (320,),
    "model.diffusion_model.input_blocks.1.1.proj_out.weight": (320, 320, 1, 1),
    "model.diffusion_model.input_blocks.1.1.proj_out.bias": (320,),
    "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": (320,),
    "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": (320,),
    "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": (320, 320, 3, 3),
    "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": (320,),
    "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": (320, 1280),
    "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": (320,),
    "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": (320,),
    "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": (320,),
    "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": (320, 320, 3, 3),
    "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": (320,),
    "model.diffusion_model.input_blocks.2.1.norm.weight": (320,),
    "model.diffusion_model.input_blocks.2.1.norm.bias": (320,),
    "model.diffusion_model.input_blocks.2.1.proj_in.weight": (320, 320, 1, 1),
    "model.diffusion_model.input_blocks.2.1.proj_in.bias": (320,),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": (
        320,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": (
        2560,
        320,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": (
        2560,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight": (
        320,
        1280,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias": (320,),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": (
        320,
        768,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": (
        320,
        768,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": (
        320,
        320,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": (
        320,
    ),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight": (320,),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias": (320,),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight": (320,),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias": (320,),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight": (320,),
    "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias": (320,),
    "model.diffusion_model.input_blocks.2.1.proj_out.weight": (320, 320, 1, 1),
    "model.diffusion_model.input_blocks.2.1.proj_out.bias": (320,),
    "model.diffusion_model.input_blocks.3.0.op.weight": (320, 320, 3, 3),
    "model.diffusion_model.input_blocks.3.0.op.bias": (320,),
    "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": (320,),
    "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": (320,),
    "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": (640, 320, 3, 3),
    "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": (640,),
    "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": (640, 1280),
    "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": (640,),
    "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": (640,),
    "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": (640,),
    "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": (640, 640, 3, 3),
    "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": (640,),
    "model.diffusion_model.input_blocks.4.0.skip_connection.weight": (640, 320, 1, 1),
    "model.diffusion_model.input_blocks.4.0.skip_connection.bias": (640,),
    "model.diffusion_model.input_blocks.4.1.norm.weight": (640,),
    "model.diffusion_model.input_blocks.4.1.norm.bias": (640,),
    "model.diffusion_model.input_blocks.4.1.proj_in.weight": (640, 640, 1, 1),
    "model.diffusion_model.input_blocks.4.1.proj_in.bias": (640,),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": (
        640,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": (
        5120,
        640,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": (
        5120,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": (
        640,
        2560,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": (640,),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": (
        640,
        768,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": (
        640,
        768,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": (
        640,
    ),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": (640,),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": (640,),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": (640,),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": (640,),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": (640,),
    "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": (640,),
    "model.diffusion_model.input_blocks.4.1.proj_out.weight": (640, 640, 1, 1),
    "model.diffusion_model.input_blocks.4.1.proj_out.bias": (640,),
    "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": (640,),
    "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": (640,),
    "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": (640, 640, 3, 3),
    "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": (640,),
    "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": (640, 1280),
    "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": (640,),
    "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": (640,),
    "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": (640,),
    "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": (640, 640, 3, 3),
    "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": (640,),
    "model.diffusion_model.input_blocks.5.1.norm.weight": (640,),
    "model.diffusion_model.input_blocks.5.1.norm.bias": (640,),
    "model.diffusion_model.input_blocks.5.1.proj_in.weight": (640, 640, 1, 1),
    "model.diffusion_model.input_blocks.5.1.proj_in.bias": (640,),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": (
        640,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": (
        5120,
        640,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": (
        5120,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": (
        640,
        2560,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": (640,),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": (
        640,
        768,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": (
        640,
        768,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": (
        640,
        640,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": (
        640,
    ),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": (640,),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": (640,),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": (640,),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": (640,),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": (640,),
    "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": (640,),
    "model.diffusion_model.input_blocks.5.1.proj_out.weight": (640, 640, 1, 1),
    "model.diffusion_model.input_blocks.5.1.proj_out.bias": (640,),
    "model.diffusion_model.input_blocks.6.0.op.weight": (640, 640, 3, 3),
    "model.diffusion_model.input_blocks.6.0.op.bias": (640,),
    "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": (640,),
    "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": (640,),
    "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": (1280, 640, 3, 3),
    "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": (1280,),
    "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": (1280,),
    "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": (1280,),
    "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": (1280,),
    "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": (1280,),
    "model.diffusion_model.input_blocks.7.0.skip_connection.weight": (1280, 640, 1, 1),
    "model.diffusion_model.input_blocks.7.0.skip_connection.bias": (1280,),
    "model.diffusion_model.input_blocks.7.1.norm.weight": (1280,),
    "model.diffusion_model.input_blocks.7.1.norm.bias": (1280,),
    "model.diffusion_model.input_blocks.7.1.proj_in.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.input_blocks.7.1.proj_in.bias": (1280,),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": (
        10240,
        1280,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": (
        10240,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": (
        1280,
        5120,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": (
        1280,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": (1280,),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": (1280,),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": (1280,),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": (1280,),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": (1280,),
    "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": (1280,),
    "model.diffusion_model.input_blocks.7.1.proj_out.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.input_blocks.7.1.proj_out.bias": (1280,),
    "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": (1280,),
    "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": (1280,),
    "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": (1280,),
    "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": (1280,),
    "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": (1280,),
    "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": (1280,),
    "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": (1280,),
    "model.diffusion_model.input_blocks.8.1.norm.weight": (1280,),
    "model.diffusion_model.input_blocks.8.1.norm.bias": (1280,),
    "model.diffusion_model.input_blocks.8.1.proj_in.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.input_blocks.8.1.proj_in.bias": (1280,),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": (
        10240,
        1280,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": (
        10240,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": (
        1280,
        5120,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": (
        1280,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": (1280,),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": (1280,),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": (1280,),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": (1280,),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": (1280,),
    "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": (1280,),
    "model.diffusion_model.input_blocks.8.1.proj_out.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.input_blocks.8.1.proj_out.bias": (1280,),
    "model.diffusion_model.input_blocks.9.0.op.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.input_blocks.9.0.op.bias": (1280,),
    "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": (1280,),
    "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": (1280,),
    "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": (1280,),
    "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": (1280,),
    "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": (1280,),
    "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": (1280,),
    "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": (1280,),
    "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": (1280,),
    "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": (1280,),
    "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": (1280,),
    "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": (1280,),
    "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": (1280,),
    "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": (1280,),
    "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": (1280,),
    "model.diffusion_model.middle_block.0.in_layers.0.weight": (1280,),
    "model.diffusion_model.middle_block.0.in_layers.0.bias": (1280,),
    "model.diffusion_model.middle_block.0.in_layers.2.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.middle_block.0.in_layers.2.bias": (1280,),
    "model.diffusion_model.middle_block.0.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.middle_block.0.emb_layers.1.bias": (1280,),
    "model.diffusion_model.middle_block.0.out_layers.0.weight": (1280,),
    "model.diffusion_model.middle_block.0.out_layers.0.bias": (1280,),
    "model.diffusion_model.middle_block.0.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.middle_block.0.out_layers.3.bias": (1280,),
    "model.diffusion_model.middle_block.1.norm.weight": (1280,),
    "model.diffusion_model.middle_block.1.norm.bias": (1280,),
    "model.diffusion_model.middle_block.1.proj_in.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.middle_block.1.proj_in.bias": (1280,),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight": (
        10240,
        1280,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": (
        10240,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight": (
        1280,
        5120,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias": (1280,),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight": (1280,),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias": (1280,),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": (1280,),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias": (1280,),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": (1280,),
    "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": (1280,),
    "model.diffusion_model.middle_block.1.proj_out.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.middle_block.1.proj_out.bias": (1280,),
    "model.diffusion_model.middle_block.2.in_layers.0.weight": (1280,),
    "model.diffusion_model.middle_block.2.in_layers.0.bias": (1280,),
    "model.diffusion_model.middle_block.2.in_layers.2.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.middle_block.2.in_layers.2.bias": (1280,),
    "model.diffusion_model.middle_block.2.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.middle_block.2.emb_layers.1.bias": (1280,),
    "model.diffusion_model.middle_block.2.out_layers.0.weight": (1280,),
    "model.diffusion_model.middle_block.2.out_layers.0.bias": (1280,),
    "model.diffusion_model.middle_block.2.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.middle_block.2.out_layers.3.bias": (1280,),
    "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": (2560,),
    "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": (2560,),
    "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": (1280, 2560, 3, 3),
    "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": (1280,),
    "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": (1280,),
    "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": (1280,),
    "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": (1280,),
    "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": (1280,),
    "model.diffusion_model.output_blocks.0.0.skip_connection.weight": (
        1280,
        2560,
        1,
        1,
    ),
    "model.diffusion_model.output_blocks.0.0.skip_connection.bias": (1280,),
    "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": (2560,),
    "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": (2560,),
    "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": (1280, 2560, 3, 3),
    "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": (1280,),
    "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": (1280,),
    "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": (1280,),
    "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": (1280,),
    "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": (1280,),
    "model.diffusion_model.output_blocks.1.0.skip_connection.weight": (
        1280,
        2560,
        1,
        1,
    ),
    "model.diffusion_model.output_blocks.1.0.skip_connection.bias": (1280,),
    "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": (2560,),
    "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": (2560,),
    "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": (1280, 2560, 3, 3),
    "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": (1280,),
    "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": (1280,),
    "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": (1280,),
    "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": (1280,),
    "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": (1280,),
    "model.diffusion_model.output_blocks.2.0.skip_connection.weight": (
        1280,
        2560,
        1,
        1,
    ),
    "model.diffusion_model.output_blocks.2.0.skip_connection.bias": (1280,),
    "model.diffusion_model.output_blocks.2.1.conv.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.output_blocks.2.1.conv.bias": (1280,),
    "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": (2560,),
    "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": (2560,),
    "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": (1280, 2560, 3, 3),
    "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": (1280,),
    "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": (1280,),
    "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": (1280,),
    "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": (1280,),
    "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": (1280,),
    "model.diffusion_model.output_blocks.3.0.skip_connection.weight": (
        1280,
        2560,
        1,
        1,
    ),
    "model.diffusion_model.output_blocks.3.0.skip_connection.bias": (1280,),
    "model.diffusion_model.output_blocks.3.1.norm.weight": (1280,),
    "model.diffusion_model.output_blocks.3.1.norm.bias": (1280,),
    "model.diffusion_model.output_blocks.3.1.proj_in.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.output_blocks.3.1.proj_in.bias": (1280,),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": (
        10240,
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": (
        10240,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": (
        1280,
        5120,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": (
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": (
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": (1280,),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": (
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": (1280,),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": (
        1280,
    ),
    "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": (1280,),
    "model.diffusion_model.output_blocks.3.1.proj_out.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.output_blocks.3.1.proj_out.bias": (1280,),
    "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": (2560,),
    "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": (2560,),
    "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": (1280, 2560, 3, 3),
    "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": (1280,),
    "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": (1280,),
    "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": (1280,),
    "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": (1280,),
    "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": (1280,),
    "model.diffusion_model.output_blocks.4.0.skip_connection.weight": (
        1280,
        2560,
        1,
        1,
    ),
    "model.diffusion_model.output_blocks.4.0.skip_connection.bias": (1280,),
    "model.diffusion_model.output_blocks.4.1.norm.weight": (1280,),
    "model.diffusion_model.output_blocks.4.1.norm.bias": (1280,),
    "model.diffusion_model.output_blocks.4.1.proj_in.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.output_blocks.4.1.proj_in.bias": (1280,),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": (
        10240,
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": (
        10240,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": (
        1280,
        5120,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": (
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": (
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": (1280,),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": (
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": (1280,),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": (
        1280,
    ),
    "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": (1280,),
    "model.diffusion_model.output_blocks.4.1.proj_out.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.output_blocks.4.1.proj_out.bias": (1280,),
    "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": (1920,),
    "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": (1920,),
    "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": (1280, 1920, 3, 3),
    "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": (1280,),
    "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": (1280, 1280),
    "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": (1280,),
    "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": (1280,),
    "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": (1280,),
    "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": (1280,),
    "model.diffusion_model.output_blocks.5.0.skip_connection.weight": (
        1280,
        1920,
        1,
        1,
    ),
    "model.diffusion_model.output_blocks.5.0.skip_connection.bias": (1280,),
    "model.diffusion_model.output_blocks.5.1.norm.weight": (1280,),
    "model.diffusion_model.output_blocks.5.1.norm.bias": (1280,),
    "model.diffusion_model.output_blocks.5.1.proj_in.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.output_blocks.5.1.proj_in.bias": (1280,),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": (
        10240,
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": (
        10240,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": (
        1280,
        5120,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": (
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": (
        1280,
        768,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": (
        1280,
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": (
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": (
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": (1280,),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": (
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": (1280,),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": (
        1280,
    ),
    "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": (1280,),
    "model.diffusion_model.output_blocks.5.1.proj_out.weight": (1280, 1280, 1, 1),
    "model.diffusion_model.output_blocks.5.1.proj_out.bias": (1280,),
    "model.diffusion_model.output_blocks.5.2.conv.weight": (1280, 1280, 3, 3),
    "model.diffusion_model.output_blocks.5.2.conv.bias": (1280,),
    "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": (1920,),
    "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": (1920,),
    "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": (640, 1920, 3, 3),
    "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": (640,),
    "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": (640, 1280),
    "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": (640,),
    "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": (640,),
    "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": (640,),
    "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": (640, 640, 3, 3),
    "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": (640,),
    "model.diffusion_model.output_blocks.6.0.skip_connection.weight": (640, 1920, 1, 1),
    "model.diffusion_model.output_blocks.6.0.skip_connection.bias": (640,),
    "model.diffusion_model.output_blocks.6.1.norm.weight": (640,),
    "model.diffusion_model.output_blocks.6.1.norm.bias": (640,),
    "model.diffusion_model.output_blocks.6.1.proj_in.weight": (640, 640, 1, 1),
    "model.diffusion_model.output_blocks.6.1.proj_in.bias": (640,),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias": (
        640,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight": (
        5120,
        640,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias": (
        5120,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight": (
        640,
        2560,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias": (
        640,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight": (
        640,
        768,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight": (
        640,
        768,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias": (
        640,
    ),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight": (640,),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias": (640,),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": (640,),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias": (640,),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": (640,),
    "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": (640,),
    "model.diffusion_model.output_blocks.6.1.proj_out.weight": (640, 640, 1, 1),
    "model.diffusion_model.output_blocks.6.1.proj_out.bias": (640,),
    "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": (1280,),
    "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": (1280,),
    "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": (640, 1280, 3, 3),
    "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": (640,),
    "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": (640, 1280),
    "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": (640,),
    "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": (640,),
    "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": (640,),
    "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": (640, 640, 3, 3),
    "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": (640,),
    "model.diffusion_model.output_blocks.7.0.skip_connection.weight": (640, 1280, 1, 1),
    "model.diffusion_model.output_blocks.7.0.skip_connection.bias": (640,),
    "model.diffusion_model.output_blocks.7.1.norm.weight": (640,),
    "model.diffusion_model.output_blocks.7.1.norm.bias": (640,),
    "model.diffusion_model.output_blocks.7.1.proj_in.weight": (640, 640, 1, 1),
    "model.diffusion_model.output_blocks.7.1.proj_in.bias": (640,),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": (
        640,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": (
        5120,
        640,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": (
        5120,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight": (
        640,
        2560,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias": (
        640,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": (
        640,
        768,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": (
        640,
        768,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": (
        640,
    ),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight": (640,),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias": (640,),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": (640,),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias": (640,),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": (640,),
    "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": (640,),
    "model.diffusion_model.output_blocks.7.1.proj_out.weight": (640, 640, 1, 1),
    "model.diffusion_model.output_blocks.7.1.proj_out.bias": (640,),
    "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": (960,),
    "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": (960,),
    "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": (640, 960, 3, 3),
    "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": (640,),
    "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": (640, 1280),
    "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": (640,),
    "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": (640,),
    "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": (640,),
    "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": (640, 640, 3, 3),
    "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": (640,),
    "model.diffusion_model.output_blocks.8.0.skip_connection.weight": (640, 960, 1, 1),
    "model.diffusion_model.output_blocks.8.0.skip_connection.bias": (640,),
    "model.diffusion_model.output_blocks.8.1.norm.weight": (640,),
    "model.diffusion_model.output_blocks.8.1.norm.bias": (640,),
    "model.diffusion_model.output_blocks.8.1.proj_in.weight": (640, 640, 1, 1),
    "model.diffusion_model.output_blocks.8.1.proj_in.bias": (640,),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": (
        640,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": (
        5120,
        640,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": (
        5120,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight": (
        640,
        2560,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias": (
        640,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": (
        640,
        768,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": (
        640,
        768,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": (
        640,
        640,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": (
        640,
    ),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight": (640,),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias": (640,),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight": (640,),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias": (640,),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": (640,),
    "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias": (640,),
    "model.diffusion_model.output_blocks.8.1.proj_out.weight": (640, 640, 1, 1),
    "model.diffusion_model.output_blocks.8.1.proj_out.bias": (640,),
    "model.diffusion_model.output_blocks.8.2.conv.weight": (640, 640, 3, 3),
    "model.diffusion_model.output_blocks.8.2.conv.bias": (640,),
    "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": (960,),
    "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": (960,),
    "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": (320, 960, 3, 3),
    "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": (320,),
    "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": (320, 1280),
    "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": (320,),
    "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": (320,),
    "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": (320,),
    "model.diffusion_model.output_blocks.9.0.out_layers.3.weight": (320, 320, 3, 3),
    "model.diffusion_model.output_blocks.9.0.out_layers.3.bias": (320,),
    "model.diffusion_model.output_blocks.9.0.skip_connection.weight": (320, 960, 1, 1),
    "model.diffusion_model.output_blocks.9.0.skip_connection.bias": (320,),
    "model.diffusion_model.output_blocks.9.1.norm.weight": (320,),
    "model.diffusion_model.output_blocks.9.1.norm.bias": (320,),
    "model.diffusion_model.output_blocks.9.1.proj_in.weight": (320, 320, 1, 1),
    "model.diffusion_model.output_blocks.9.1.proj_in.bias": (320,),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias": (
        320,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight": (
        2560,
        320,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias": (
        2560,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight": (
        320,
        1280,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias": (
        320,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight": (
        320,
        768,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight": (
        320,
        768,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias": (
        320,
    ),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight": (320,),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias": (320,),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight": (320,),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias": (320,),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight": (320,),
    "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias": (320,),
    "model.diffusion_model.output_blocks.9.1.proj_out.weight": (320, 320, 1, 1),
    "model.diffusion_model.output_blocks.9.1.proj_out.bias": (320,),
    "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": (640,),
    "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": (640,),
    "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": (320, 640, 3, 3),
    "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": (320,),
    "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": (320, 1280),
    "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": (320,),
    "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": (320,),
    "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": (320,),
    "model.diffusion_model.output_blocks.10.0.out_layers.3.weight": (320, 320, 3, 3),
    "model.diffusion_model.output_blocks.10.0.out_layers.3.bias": (320,),
    "model.diffusion_model.output_blocks.10.0.skip_connection.weight": (320, 640, 1, 1),
    "model.diffusion_model.output_blocks.10.0.skip_connection.bias": (320,),
    "model.diffusion_model.output_blocks.10.1.norm.weight": (320,),
    "model.diffusion_model.output_blocks.10.1.norm.bias": (320,),
    "model.diffusion_model.output_blocks.10.1.proj_in.weight": (320, 320, 1, 1),
    "model.diffusion_model.output_blocks.10.1.proj_in.bias": (320,),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias": (
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight": (
        2560,
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias": (
        2560,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight": (
        320,
        1280,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias": (
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight": (
        320,
        768,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight": (
        320,
        768,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias": (
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight": (
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias": (320,),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight": (
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias": (320,),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight": (
        320,
    ),
    "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias": (320,),
    "model.diffusion_model.output_blocks.10.1.proj_out.weight": (320, 320, 1, 1),
    "model.diffusion_model.output_blocks.10.1.proj_out.bias": (320,),
    "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": (640,),
    "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": (640,),
    "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": (320, 640, 3, 3),
    "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": (320,),
    "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": (320, 1280),
    "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": (320,),
    "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": (320,),
    "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": (320,),
    "model.diffusion_model.output_blocks.11.0.out_layers.3.weight": (320, 320, 3, 3),
    "model.diffusion_model.output_blocks.11.0.out_layers.3.bias": (320,),
    "model.diffusion_model.output_blocks.11.0.skip_connection.weight": (320, 640, 1, 1),
    "model.diffusion_model.output_blocks.11.0.skip_connection.bias": (320,),
    "model.diffusion_model.output_blocks.11.1.norm.weight": (320,),
    "model.diffusion_model.output_blocks.11.1.norm.bias": (320,),
    "model.diffusion_model.output_blocks.11.1.proj_in.weight": (320, 320, 1, 1),
    "model.diffusion_model.output_blocks.11.1.proj_in.bias": (320,),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias": (
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight": (
        2560,
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias": (
        2560,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight": (
        320,
        1280,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias": (
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight": (
        320,
        768,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight": (
        320,
        768,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight": (
        320,
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias": (
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight": (
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias": (320,),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight": (
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias": (320,),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight": (
        320,
    ),
    "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias": (320,),
    "model.diffusion_model.output_blocks.11.1.proj_out.weight": (320, 320, 1, 1),
    "model.diffusion_model.output_blocks.11.1.proj_out.bias": (320,),
    "model.diffusion_model.out.0.weight": (320,),
    "model.diffusion_model.out.0.bias": (320,),
    "model.diffusion_model.out.2.weight": (4, 320, 3, 3),
    "model.diffusion_model.out.2.bias": (4,),
}


shapes_encoder = {
    "first_stage_model.encoder.conv_in.weight": (128, 3, 3, 3),
    "first_stage_model.encoder.conv_in.bias": (128,),
    "first_stage_model.encoder.down.0.block.0.norm1.weight": (128,),
    "first_stage_model.encoder.down.0.block.0.norm1.bias": (128,),
    "first_stage_model.encoder.down.0.block.0.conv1.weight": (128, 128, 3, 3),
    "first_stage_model.encoder.down.0.block.0.conv1.bias": (128,),
    "first_stage_model.encoder.down.0.block.0.norm2.weight": (128,),
    "first_stage_model.encoder.down.0.block.0.norm2.bias": (128,),
    "first_stage_model.encoder.down.0.block.0.conv2.weight": (128, 128, 3, 3),
    "first_stage_model.encoder.down.0.block.0.conv2.bias": (128,),
    "first_stage_model.encoder.down.0.block.1.norm1.weight": (128,),
    "first_stage_model.encoder.down.0.block.1.norm1.bias": (128,),
    "first_stage_model.encoder.down.0.block.1.conv1.weight": (128, 128, 3, 3),
    "first_stage_model.encoder.down.0.block.1.conv1.bias": (128,),
    "first_stage_model.encoder.down.0.block.1.norm2.weight": (128,),
    "first_stage_model.encoder.down.0.block.1.norm2.bias": (128,),
    "first_stage_model.encoder.down.0.block.1.conv2.weight": (128, 128, 3, 3),
    "first_stage_model.encoder.down.0.block.1.conv2.bias": (128,),
    "first_stage_model.encoder.down.0.downsample.conv.weight": (128, 128, 3, 3),
    "first_stage_model.encoder.down.0.downsample.conv.bias": (128,),
    "first_stage_model.encoder.down.1.block.0.norm1.weight": (128,),
    "first_stage_model.encoder.down.1.block.0.norm1.bias": (128,),
    "first_stage_model.encoder.down.1.block.0.conv1.weight": (256, 128, 3, 3),
    "first_stage_model.encoder.down.1.block.0.conv1.bias": (256,),
    "first_stage_model.encoder.down.1.block.0.norm2.weight": (256,),
    "first_stage_model.encoder.down.1.block.0.norm2.bias": (256,),
    "first_stage_model.encoder.down.1.block.0.conv2.weight": (256, 256, 3, 3),
    "first_stage_model.encoder.down.1.block.0.conv2.bias": (256,),
    "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": (256, 128, 1, 1),
    "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": (256,),
    "first_stage_model.encoder.down.1.block.1.norm1.weight": (256,),
    "first_stage_model.encoder.down.1.block.1.norm1.bias": (256,),
    "first_stage_model.encoder.down.1.block.1.conv1.weight": (256, 256, 3, 3),
    "first_stage_model.encoder.down.1.block.1.conv1.bias": (256,),
    "first_stage_model.encoder.down.1.block.1.norm2.weight": (256,),
    "first_stage_model.encoder.down.1.block.1.norm2.bias": (256,),
    "first_stage_model.encoder.down.1.block.1.conv2.weight": (256, 256, 3, 3),
    "first_stage_model.encoder.down.1.block.1.conv2.bias": (256,),
    "first_stage_model.encoder.down.1.downsample.conv.weight": (256, 256, 3, 3),
    "first_stage_model.encoder.down.1.downsample.conv.bias": (256,),
    "first_stage_model.encoder.down.2.block.0.norm1.weight": (256,),
    "first_stage_model.encoder.down.2.block.0.norm1.bias": (256,),
    "first_stage_model.encoder.down.2.block.0.conv1.weight": (512, 256, 3, 3),
    "first_stage_model.encoder.down.2.block.0.conv1.bias": (512,),
    "first_stage_model.encoder.down.2.block.0.norm2.weight": (512,),
    "first_stage_model.encoder.down.2.block.0.norm2.bias": (512,),
    "first_stage_model.encoder.down.2.block.0.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.down.2.block.0.conv2.bias": (512,),
    "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": (512, 256, 1, 1),
    "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": (512,),
    "first_stage_model.encoder.down.2.block.1.norm1.weight": (512,),
    "first_stage_model.encoder.down.2.block.1.norm1.bias": (512,),
    "first_stage_model.encoder.down.2.block.1.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.down.2.block.1.conv1.bias": (512,),
    "first_stage_model.encoder.down.2.block.1.norm2.weight": (512,),
    "first_stage_model.encoder.down.2.block.1.norm2.bias": (512,),
    "first_stage_model.encoder.down.2.block.1.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.down.2.block.1.conv2.bias": (512,),
    "first_stage_model.encoder.down.2.downsample.conv.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.down.2.downsample.conv.bias": (512,),
    "first_stage_model.encoder.down.3.block.0.norm1.weight": (512,),
    "first_stage_model.encoder.down.3.block.0.norm1.bias": (512,),
    "first_stage_model.encoder.down.3.block.0.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.down.3.block.0.conv1.bias": (512,),
    "first_stage_model.encoder.down.3.block.0.norm2.weight": (512,),
    "first_stage_model.encoder.down.3.block.0.norm2.bias": (512,),
    "first_stage_model.encoder.down.3.block.0.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.down.3.block.0.conv2.bias": (512,),
    "first_stage_model.encoder.down.3.block.1.norm1.weight": (512,),
    "first_stage_model.encoder.down.3.block.1.norm1.bias": (512,),
    "first_stage_model.encoder.down.3.block.1.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.down.3.block.1.conv1.bias": (512,),
    "first_stage_model.encoder.down.3.block.1.norm2.weight": (512,),
    "first_stage_model.encoder.down.3.block.1.norm2.bias": (512,),
    "first_stage_model.encoder.down.3.block.1.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.down.3.block.1.conv2.bias": (512,),
    "first_stage_model.encoder.mid.block_1.norm1.weight": (512,),
    "first_stage_model.encoder.mid.block_1.norm1.bias": (512,),
    "first_stage_model.encoder.mid.block_1.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.mid.block_1.conv1.bias": (512,),
    "first_stage_model.encoder.mid.block_1.norm2.weight": (512,),
    "first_stage_model.encoder.mid.block_1.norm2.bias": (512,),
    "first_stage_model.encoder.mid.block_1.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.mid.block_1.conv2.bias": (512,),
    "first_stage_model.encoder.mid.attn_1.norm.weight": (512,),
    "first_stage_model.encoder.mid.attn_1.norm.bias": (512,),
    "first_stage_model.encoder.mid.attn_1.q.weight": (512, 512, 1, 1),
    "first_stage_model.encoder.mid.attn_1.q.bias": (512,),
    "first_stage_model.encoder.mid.attn_1.k.weight": (512, 512, 1, 1),
    "first_stage_model.encoder.mid.attn_1.k.bias": (512,),
    "first_stage_model.encoder.mid.attn_1.v.weight": (512, 512, 1, 1),
    "first_stage_model.encoder.mid.attn_1.v.bias": (512,),
    "first_stage_model.encoder.mid.attn_1.proj_out.weight": (512, 512, 1, 1),
    "first_stage_model.encoder.mid.attn_1.proj_out.bias": (512,),
    "first_stage_model.encoder.mid.block_2.norm1.weight": (512,),
    "first_stage_model.encoder.mid.block_2.norm1.bias": (512,),
    "first_stage_model.encoder.mid.block_2.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.mid.block_2.conv1.bias": (512,),
    "first_stage_model.encoder.mid.block_2.norm2.weight": (512,),
    "first_stage_model.encoder.mid.block_2.norm2.bias": (512,),
    "first_stage_model.encoder.mid.block_2.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.encoder.mid.block_2.conv2.bias": (512,),
    "first_stage_model.encoder.norm_out.weight": (512,),
    "first_stage_model.encoder.norm_out.bias": (512,),
    "first_stage_model.encoder.conv_out.weight": (8, 512, 3, 3),
    "first_stage_model.encoder.conv_out.bias": (8,)
}

shapes_decoder = {
    "first_stage_model.decoder.conv_in.weight": (512, 4, 3, 3),
    "first_stage_model.decoder.conv_in.bias": (512,),
    "first_stage_model.decoder.mid.block_1.norm1.weight": (512,),
    "first_stage_model.decoder.mid.block_1.norm1.bias": (512,),
    "first_stage_model.decoder.mid.block_1.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.mid.block_1.conv1.bias": (512,),
    "first_stage_model.decoder.mid.block_1.norm2.weight": (512,),
    "first_stage_model.decoder.mid.block_1.norm2.bias": (512,),
    "first_stage_model.decoder.mid.block_1.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.mid.block_1.conv2.bias": (512,),
    "first_stage_model.decoder.mid.attn_1.norm.weight": (512,),
    "first_stage_model.decoder.mid.attn_1.norm.bias": (512,),
    "first_stage_model.decoder.mid.attn_1.q.weight": (512, 512, 1, 1),
    "first_stage_model.decoder.mid.attn_1.q.bias": (512,),
    "first_stage_model.decoder.mid.attn_1.k.weight": (512, 512, 1, 1),
    "first_stage_model.decoder.mid.attn_1.k.bias": (512,),
    "first_stage_model.decoder.mid.attn_1.v.weight": (512, 512, 1, 1),
    "first_stage_model.decoder.mid.attn_1.v.bias": (512,),
    "first_stage_model.decoder.mid.attn_1.proj_out.weight": (512, 512, 1, 1),
    "first_stage_model.decoder.mid.attn_1.proj_out.bias": (512,),
    "first_stage_model.decoder.mid.block_2.norm1.weight": (512,),
    "first_stage_model.decoder.mid.block_2.norm1.bias": (512,),
    "first_stage_model.decoder.mid.block_2.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.mid.block_2.conv1.bias": (512,),
    "first_stage_model.decoder.mid.block_2.norm2.weight": (512,),
    "first_stage_model.decoder.mid.block_2.norm2.bias": (512,),
    "first_stage_model.decoder.mid.block_2.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.mid.block_2.conv2.bias": (512,),
    "first_stage_model.decoder.up.0.block.0.norm1.weight": (256,),
    "first_stage_model.decoder.up.0.block.0.norm1.bias": (256,),
    "first_stage_model.decoder.up.0.block.0.conv1.weight": (128, 256, 3, 3),
    "first_stage_model.decoder.up.0.block.0.conv1.bias": (128,),
    "first_stage_model.decoder.up.0.block.0.norm2.weight": (128,),
    "first_stage_model.decoder.up.0.block.0.norm2.bias": (128,),
    "first_stage_model.decoder.up.0.block.0.conv2.weight": (128, 128, 3, 3),
    "first_stage_model.decoder.up.0.block.0.conv2.bias": (128,),
    "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": (128, 256, 1, 1),
    "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": (128,),
    "first_stage_model.decoder.up.0.block.1.norm1.weight": (128,),
    "first_stage_model.decoder.up.0.block.1.norm1.bias": (128,),
    "first_stage_model.decoder.up.0.block.1.conv1.weight": (128, 128, 3, 3),
    "first_stage_model.decoder.up.0.block.1.conv1.bias": (128,),
    "first_stage_model.decoder.up.0.block.1.norm2.weight": (128,),
    "first_stage_model.decoder.up.0.block.1.norm2.bias": (128,),
    "first_stage_model.decoder.up.0.block.1.conv2.weight": (128, 128, 3, 3),
    "first_stage_model.decoder.up.0.block.1.conv2.bias": (128,),
    "first_stage_model.decoder.up.0.block.2.norm1.weight": (128,),
    "first_stage_model.decoder.up.0.block.2.norm1.bias": (128,),
    "first_stage_model.decoder.up.0.block.2.conv1.weight": (128, 128, 3, 3),
    "first_stage_model.decoder.up.0.block.2.conv1.bias": (128,),
    "first_stage_model.decoder.up.0.block.2.norm2.weight": (128,),
    "first_stage_model.decoder.up.0.block.2.norm2.bias": (128,),
    "first_stage_model.decoder.up.0.block.2.conv2.weight": (128, 128, 3, 3),
    "first_stage_model.decoder.up.0.block.2.conv2.bias": (128,),
    "first_stage_model.decoder.up.1.block.0.norm1.weight": (512,),
    "first_stage_model.decoder.up.1.block.0.norm1.bias": (512,),
    "first_stage_model.decoder.up.1.block.0.conv1.weight": (256, 512, 3, 3),
    "first_stage_model.decoder.up.1.block.0.conv1.bias": (256,),
    "first_stage_model.decoder.up.1.block.0.norm2.weight": (256,),
    "first_stage_model.decoder.up.1.block.0.norm2.bias": (256,),
    "first_stage_model.decoder.up.1.block.0.conv2.weight": (256, 256, 3, 3),
    "first_stage_model.decoder.up.1.block.0.conv2.bias": (256,),
    "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": (256, 512, 1, 1),
    "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": (256,),
    "first_stage_model.decoder.up.1.block.1.norm1.weight": (256,),
    "first_stage_model.decoder.up.1.block.1.norm1.bias": (256,),
    "first_stage_model.decoder.up.1.block.1.conv1.weight": (256, 256, 3, 3),
    "first_stage_model.decoder.up.1.block.1.conv1.bias": (256,),
    "first_stage_model.decoder.up.1.block.1.norm2.weight": (256,),
    "first_stage_model.decoder.up.1.block.1.norm2.bias": (256,),
    "first_stage_model.decoder.up.1.block.1.conv2.weight": (256, 256, 3, 3),
    "first_stage_model.decoder.up.1.block.1.conv2.bias": (256,),
    "first_stage_model.decoder.up.1.block.2.norm1.weight": (256,),
    "first_stage_model.decoder.up.1.block.2.norm1.bias": (256,),
    "first_stage_model.decoder.up.1.block.2.conv1.weight": (256, 256, 3, 3),
    "first_stage_model.decoder.up.1.block.2.conv1.bias": (256,),
    "first_stage_model.decoder.up.1.block.2.norm2.weight": (256,),
    "first_stage_model.decoder.up.1.block.2.norm2.bias": (256,),
    "first_stage_model.decoder.up.1.block.2.conv2.weight": (256, 256, 3, 3),
    "first_stage_model.decoder.up.1.block.2.conv2.bias": (256,),
    "first_stage_model.decoder.up.1.upsample.conv.weight": (256, 256, 3, 3),
    "first_stage_model.decoder.up.1.upsample.conv.bias": (256,),
    "first_stage_model.decoder.up.2.block.0.norm1.weight": (512,),
    "first_stage_model.decoder.up.2.block.0.norm1.bias": (512,),
    "first_stage_model.decoder.up.2.block.0.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.2.block.0.conv1.bias": (512,),
    "first_stage_model.decoder.up.2.block.0.norm2.weight": (512,),
    "first_stage_model.decoder.up.2.block.0.norm2.bias": (512,),
    "first_stage_model.decoder.up.2.block.0.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.2.block.0.conv2.bias": (512,),
    "first_stage_model.decoder.up.2.block.1.norm1.weight": (512,),
    "first_stage_model.decoder.up.2.block.1.norm1.bias": (512,),
    "first_stage_model.decoder.up.2.block.1.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.2.block.1.conv1.bias": (512,),
    "first_stage_model.decoder.up.2.block.1.norm2.weight": (512,),
    "first_stage_model.decoder.up.2.block.1.norm2.bias": (512,),
    "first_stage_model.decoder.up.2.block.1.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.2.block.1.conv2.bias": (512,),
    "first_stage_model.decoder.up.2.block.2.norm1.weight": (512,),
    "first_stage_model.decoder.up.2.block.2.norm1.bias": (512,),
    "first_stage_model.decoder.up.2.block.2.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.2.block.2.conv1.bias": (512,),
    "first_stage_model.decoder.up.2.block.2.norm2.weight": (512,),
    "first_stage_model.decoder.up.2.block.2.norm2.bias": (512,),
    "first_stage_model.decoder.up.2.block.2.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.2.block.2.conv2.bias": (512,),
    "first_stage_model.decoder.up.2.upsample.conv.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.2.upsample.conv.bias": (512,),
    "first_stage_model.decoder.up.3.block.0.norm1.weight": (512,),
    "first_stage_model.decoder.up.3.block.0.norm1.bias": (512,),
    "first_stage_model.decoder.up.3.block.0.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.3.block.0.conv1.bias": (512,),
    "first_stage_model.decoder.up.3.block.0.norm2.weight": (512,),
    "first_stage_model.decoder.up.3.block.0.norm2.bias": (512,),
    "first_stage_model.decoder.up.3.block.0.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.3.block.0.conv2.bias": (512,),
    "first_stage_model.decoder.up.3.block.1.norm1.weight": (512,),
    "first_stage_model.decoder.up.3.block.1.norm1.bias": (512,),
    "first_stage_model.decoder.up.3.block.1.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.3.block.1.conv1.bias": (512,),
    "first_stage_model.decoder.up.3.block.1.norm2.weight": (512,),
    "first_stage_model.decoder.up.3.block.1.norm2.bias": (512,),
    "first_stage_model.decoder.up.3.block.1.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.3.block.1.conv2.bias": (512,),
    "first_stage_model.decoder.up.3.block.2.norm1.weight": (512,),
    "first_stage_model.decoder.up.3.block.2.norm1.bias": (512,),
    "first_stage_model.decoder.up.3.block.2.conv1.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.3.block.2.conv1.bias": (512,),
    "first_stage_model.decoder.up.3.block.2.norm2.weight": (512,),
    "first_stage_model.decoder.up.3.block.2.norm2.bias": (512,),
    "first_stage_model.decoder.up.3.block.2.conv2.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.3.block.2.conv2.bias": (512,),
    "first_stage_model.decoder.up.3.upsample.conv.weight": (512, 512, 3, 3),
    "first_stage_model.decoder.up.3.upsample.conv.bias": (512,),
    "first_stage_model.decoder.norm_out.weight": (128,),
    "first_stage_model.decoder.norm_out.bias": (128,),
    "first_stage_model.decoder.conv_out.weight": (3, 128, 3, 3),
    "first_stage_model.decoder.conv_out.bias": (3,),
    "first_stage_model.quant_conv.weight": (8, 8, 1, 1),
    "first_stage_model.quant_conv.bias": (8,),
    "first_stage_model.post_quant_conv.weight": (4, 4, 1, 1),
    "first_stage_model.post_quant_conv.bias": (4,)

}

shapes_text_encoder = {
    # "cond_stage_model.transformer.text_model.embeddings.position_ids": (1, 77),
    "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight": (
        49408,
        768,
    ),
    "cond_stage_model.transformer.text_model.embeddings.position_embedding.weight": (
        77,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight": (
        768,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.weight": (
        3072,
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.bias": (3072,),
    "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.weight": (
        768,
        3072,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.bias": (768,),
    "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.weight": (
        768,
    ),
    "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.bias": (
        768,
    ),
    "cond_stage_model.transformer.text_model.final_layer_norm.weight": (768,),
    "cond_stage_model.transformer.text_model.final_layer_norm.bias": (768,),
}


shapes_unet_v2 = {'model.diffusion_model.time_embed.0.weight': (1280, 320),
 'model.diffusion_model.time_embed.0.bias': (1280,),
 'model.diffusion_model.time_embed.2.weight': (1280, 1280),
 'model.diffusion_model.time_embed.2.bias': (1280,),
 'model.diffusion_model.input_blocks.0.0.weight': (320, 4, 3, 3),
 'model.diffusion_model.input_blocks.0.0.bias': (320,),
 'model.diffusion_model.input_blocks.1.0.in_layers.0.weight': (320,),
 'model.diffusion_model.input_blocks.1.0.in_layers.0.bias': (320,),
 'model.diffusion_model.input_blocks.1.0.in_layers.2.weight': (320, 320, 3, 3),
 'model.diffusion_model.input_blocks.1.0.in_layers.2.bias': (320,),
 'model.diffusion_model.input_blocks.1.0.emb_layers.1.weight': (320, 1280),
 'model.diffusion_model.input_blocks.1.0.emb_layers.1.bias': (320,),
 'model.diffusion_model.input_blocks.1.0.out_layers.0.weight': (320,),
 'model.diffusion_model.input_blocks.1.0.out_layers.0.bias': (320,),
 'model.diffusion_model.input_blocks.1.0.out_layers.3.weight': (320,
  320,
  3,
  3),
 'model.diffusion_model.input_blocks.1.0.out_layers.3.bias': (320,),
 'model.diffusion_model.input_blocks.1.1.norm.weight': (320,),
 'model.diffusion_model.input_blocks.1.1.norm.bias': (320,),
 'model.diffusion_model.input_blocks.1.1.proj_in.weight': (320, 320, 1, 1),
 'model.diffusion_model.input_blocks.1.1.proj_in.bias': (320,),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight': (320,
  320),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight': (320,
  320),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight': (320,
  320),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight': (320,
  320),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias': (320,),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight': (2560,
  320),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias': (2560,),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight': (320,
  1280),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias': (320,),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight': (320,
  320),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight': (320,
  1024),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight': (320,
  1024),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight': (320,
  320),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias': (320,),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight': (320,),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias': (320,),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight': (320,),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias': (320,),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight': (320,),
 'model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias': (320,),
 'model.diffusion_model.input_blocks.1.1.proj_out.weight': (320, 320, 1, 1),
 'model.diffusion_model.input_blocks.1.1.proj_out.bias': (320,),
 'model.diffusion_model.input_blocks.2.0.in_layers.0.weight': (320,),
 'model.diffusion_model.input_blocks.2.0.in_layers.0.bias': (320,),
 'model.diffusion_model.input_blocks.2.0.in_layers.2.weight': (320, 320, 3, 3),
 'model.diffusion_model.input_blocks.2.0.in_layers.2.bias': (320,),
 'model.diffusion_model.input_blocks.2.0.emb_layers.1.weight': (320, 1280),
 'model.diffusion_model.input_blocks.2.0.emb_layers.1.bias': (320,),
 'model.diffusion_model.input_blocks.2.0.out_layers.0.weight': (320,),
 'model.diffusion_model.input_blocks.2.0.out_layers.0.bias': (320,),
 'model.diffusion_model.input_blocks.2.0.out_layers.3.weight': (320,
  320,
  3,
  3),
 'model.diffusion_model.input_blocks.2.0.out_layers.3.bias': (320,),
 'model.diffusion_model.input_blocks.2.1.norm.weight': (320,),
 'model.diffusion_model.input_blocks.2.1.norm.bias': (320,),
 'model.diffusion_model.input_blocks.2.1.proj_in.weight': (320, 320, 1, 1),
 'model.diffusion_model.input_blocks.2.1.proj_in.bias': (320,),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight': (320,
  320),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight': (320,
  320),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight': (320,
  320),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight': (320,
  320),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias': (320,),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight': (2560,
  320),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias': (2560,),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight': (320,
  1280),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias': (320,),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight': (320,
  320),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight': (320,
  1024),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight': (320,
  1024),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight': (320,
  320),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias': (320,),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight': (320,),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias': (320,),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight': (320,),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias': (320,),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight': (320,),
 'model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias': (320,),
 'model.diffusion_model.input_blocks.2.1.proj_out.weight': (320, 320, 1, 1),
 'model.diffusion_model.input_blocks.2.1.proj_out.bias': (320,),
 'model.diffusion_model.input_blocks.3.0.op.weight': (320, 320, 3, 3),
 'model.diffusion_model.input_blocks.3.0.op.bias': (320,),
 'model.diffusion_model.input_blocks.4.0.in_layers.0.weight': (320,),
 'model.diffusion_model.input_blocks.4.0.in_layers.0.bias': (320,),
 'model.diffusion_model.input_blocks.4.0.in_layers.2.weight': (640, 320, 3, 3),
 'model.diffusion_model.input_blocks.4.0.in_layers.2.bias': (640,),
 'model.diffusion_model.input_blocks.4.0.emb_layers.1.weight': (640, 1280),
 'model.diffusion_model.input_blocks.4.0.emb_layers.1.bias': (640,),
 'model.diffusion_model.input_blocks.4.0.out_layers.0.weight': (640,),
 'model.diffusion_model.input_blocks.4.0.out_layers.0.bias': (640,),
 'model.diffusion_model.input_blocks.4.0.out_layers.3.weight': (640,
  640,
  3,
  3),
 'model.diffusion_model.input_blocks.4.0.out_layers.3.bias': (640,),
 'model.diffusion_model.input_blocks.4.0.skip_connection.weight': (640,
  320,
  1,
  1),
 'model.diffusion_model.input_blocks.4.0.skip_connection.bias': (640,),
 'model.diffusion_model.input_blocks.4.1.norm.weight': (640,),
 'model.diffusion_model.input_blocks.4.1.norm.bias': (640,),
 'model.diffusion_model.input_blocks.4.1.proj_in.weight': (640, 640, 1, 1),
 'model.diffusion_model.input_blocks.4.1.proj_in.bias': (640,),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight': (640,
  640),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight': (640,
  640),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight': (640,
  640),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight': (640,
  640),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias': (640,),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight': (5120,
  640),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias': (5120,),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight': (640,
  2560),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias': (640,),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight': (640,
  640),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight': (640,
  1024),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight': (640,
  1024),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight': (640,
  640),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias': (640,),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight': (640,),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias': (640,),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight': (640,),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias': (640,),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight': (640,),
 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias': (640,),
 'model.diffusion_model.input_blocks.4.1.proj_out.weight': (640, 640, 1, 1),
 'model.diffusion_model.input_blocks.4.1.proj_out.bias': (640,),
 'model.diffusion_model.input_blocks.5.0.in_layers.0.weight': (640,),
 'model.diffusion_model.input_blocks.5.0.in_layers.0.bias': (640,),
 'model.diffusion_model.input_blocks.5.0.in_layers.2.weight': (640, 640, 3, 3),
 'model.diffusion_model.input_blocks.5.0.in_layers.2.bias': (640,),
 'model.diffusion_model.input_blocks.5.0.emb_layers.1.weight': (640, 1280),
 'model.diffusion_model.input_blocks.5.0.emb_layers.1.bias': (640,),
 'model.diffusion_model.input_blocks.5.0.out_layers.0.weight': (640,),
 'model.diffusion_model.input_blocks.5.0.out_layers.0.bias': (640,),
 'model.diffusion_model.input_blocks.5.0.out_layers.3.weight': (640,
  640,
  3,
  3),
 'model.diffusion_model.input_blocks.5.0.out_layers.3.bias': (640,),
 'model.diffusion_model.input_blocks.5.1.norm.weight': (640,),
 'model.diffusion_model.input_blocks.5.1.norm.bias': (640,),
 'model.diffusion_model.input_blocks.5.1.proj_in.weight': (640, 640, 1, 1),
 'model.diffusion_model.input_blocks.5.1.proj_in.bias': (640,),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight': (640,
  640),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight': (640,
  640),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight': (640,
  640),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight': (640,
  640),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias': (640,),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight': (5120,
  640),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias': (5120,),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight': (640,
  2560),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias': (640,),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight': (640,
  640),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight': (640,
  1024),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight': (640,
  1024),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight': (640,
  640),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias': (640,),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight': (640,),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias': (640,),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight': (640,),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias': (640,),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight': (640,),
 'model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias': (640,),
 'model.diffusion_model.input_blocks.5.1.proj_out.weight': (640, 640, 1, 1),
 'model.diffusion_model.input_blocks.5.1.proj_out.bias': (640,),
 'model.diffusion_model.input_blocks.6.0.op.weight': (640, 640, 3, 3),
 'model.diffusion_model.input_blocks.6.0.op.bias': (640,),
 'model.diffusion_model.input_blocks.7.0.in_layers.0.weight': (640,),
 'model.diffusion_model.input_blocks.7.0.in_layers.0.bias': (640,),
 'model.diffusion_model.input_blocks.7.0.in_layers.2.weight': (1280,
  640,
  3,
  3),
 'model.diffusion_model.input_blocks.7.0.in_layers.2.bias': (1280,),
 'model.diffusion_model.input_blocks.7.0.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.input_blocks.7.0.emb_layers.1.bias': (1280,),
 'model.diffusion_model.input_blocks.7.0.out_layers.0.weight': (1280,),
 'model.diffusion_model.input_blocks.7.0.out_layers.0.bias': (1280,),
 'model.diffusion_model.input_blocks.7.0.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.input_blocks.7.0.out_layers.3.bias': (1280,),
 'model.diffusion_model.input_blocks.7.0.skip_connection.weight': (1280,
  640,
  1,
  1),
 'model.diffusion_model.input_blocks.7.0.skip_connection.bias': (1280,),
 'model.diffusion_model.input_blocks.7.1.norm.weight': (1280,),
 'model.diffusion_model.input_blocks.7.1.norm.bias': (1280,),
 'model.diffusion_model.input_blocks.7.1.proj_in.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.input_blocks.7.1.proj_in.bias': (1280,),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias': (1280,),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight': (10240,
  1280),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias': (10240,),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight': (1280,
  5120),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias': (1280,),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight': (1280,
  1024),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight': (1280,
  1024),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias': (1280,),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight': (1280,),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias': (1280,),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight': (1280,),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias': (1280,),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight': (1280,),
 'model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias': (1280,),
 'model.diffusion_model.input_blocks.7.1.proj_out.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.input_blocks.7.1.proj_out.bias': (1280,),
 'model.diffusion_model.input_blocks.8.0.in_layers.0.weight': (1280,),
 'model.diffusion_model.input_blocks.8.0.in_layers.0.bias': (1280,),
 'model.diffusion_model.input_blocks.8.0.in_layers.2.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.input_blocks.8.0.in_layers.2.bias': (1280,),
 'model.diffusion_model.input_blocks.8.0.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.input_blocks.8.0.emb_layers.1.bias': (1280,),
 'model.diffusion_model.input_blocks.8.0.out_layers.0.weight': (1280,),
 'model.diffusion_model.input_blocks.8.0.out_layers.0.bias': (1280,),
 'model.diffusion_model.input_blocks.8.0.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.input_blocks.8.0.out_layers.3.bias': (1280,),
 'model.diffusion_model.input_blocks.8.1.norm.weight': (1280,),
 'model.diffusion_model.input_blocks.8.1.norm.bias': (1280,),
 'model.diffusion_model.input_blocks.8.1.proj_in.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.input_blocks.8.1.proj_in.bias': (1280,),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias': (1280,),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight': (10240,
  1280),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias': (10240,),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight': (1280,
  5120),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias': (1280,),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight': (1280,
  1024),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight': (1280,
  1024),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias': (1280,),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight': (1280,),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias': (1280,),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight': (1280,),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias': (1280,),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight': (1280,),
 'model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias': (1280,),
 'model.diffusion_model.input_blocks.8.1.proj_out.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.input_blocks.8.1.proj_out.bias': (1280,),
 'model.diffusion_model.input_blocks.9.0.op.weight': (1280, 1280, 3, 3),
 'model.diffusion_model.input_blocks.9.0.op.bias': (1280,),
 'model.diffusion_model.input_blocks.10.0.in_layers.0.weight': (1280,),
 'model.diffusion_model.input_blocks.10.0.in_layers.0.bias': (1280,),
 'model.diffusion_model.input_blocks.10.0.in_layers.2.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.input_blocks.10.0.in_layers.2.bias': (1280,),
 'model.diffusion_model.input_blocks.10.0.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.input_blocks.10.0.emb_layers.1.bias': (1280,),
 'model.diffusion_model.input_blocks.10.0.out_layers.0.weight': (1280,),
 'model.diffusion_model.input_blocks.10.0.out_layers.0.bias': (1280,),
 'model.diffusion_model.input_blocks.10.0.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.input_blocks.10.0.out_layers.3.bias': (1280,),
 'model.diffusion_model.input_blocks.11.0.in_layers.0.weight': (1280,),
 'model.diffusion_model.input_blocks.11.0.in_layers.0.bias': (1280,),
 'model.diffusion_model.input_blocks.11.0.in_layers.2.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.input_blocks.11.0.in_layers.2.bias': (1280,),
 'model.diffusion_model.input_blocks.11.0.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.input_blocks.11.0.emb_layers.1.bias': (1280,),
 'model.diffusion_model.input_blocks.11.0.out_layers.0.weight': (1280,),
 'model.diffusion_model.input_blocks.11.0.out_layers.0.bias': (1280,),
 'model.diffusion_model.input_blocks.11.0.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.input_blocks.11.0.out_layers.3.bias': (1280,),
 'model.diffusion_model.middle_block.0.in_layers.0.weight': (1280,),
 'model.diffusion_model.middle_block.0.in_layers.0.bias': (1280,),
 'model.diffusion_model.middle_block.0.in_layers.2.weight': (1280, 1280, 3, 3),
 'model.diffusion_model.middle_block.0.in_layers.2.bias': (1280,),
 'model.diffusion_model.middle_block.0.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.middle_block.0.emb_layers.1.bias': (1280,),
 'model.diffusion_model.middle_block.0.out_layers.0.weight': (1280,),
 'model.diffusion_model.middle_block.0.out_layers.0.bias': (1280,),
 'model.diffusion_model.middle_block.0.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.middle_block.0.out_layers.3.bias': (1280,),
 'model.diffusion_model.middle_block.1.norm.weight': (1280,),
 'model.diffusion_model.middle_block.1.norm.bias': (1280,),
 'model.diffusion_model.middle_block.1.proj_in.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.middle_block.1.proj_in.bias': (1280,),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight': (1280,
  1280),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight': (1280,
  1280),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight': (1280,
  1280),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias': (1280,),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight': (10240,
  1280),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias': (10240,),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight': (1280,
  5120),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias': (1280,),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight': (1280,
  1280),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight': (1280,
  1024),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight': (1280,
  1024),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias': (1280,),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight': (1280,),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias': (1280,),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight': (1280,),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias': (1280,),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight': (1280,),
 'model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias': (1280,),
 'model.diffusion_model.middle_block.1.proj_out.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.middle_block.1.proj_out.bias': (1280,),
 'model.diffusion_model.middle_block.2.in_layers.0.weight': (1280,),
 'model.diffusion_model.middle_block.2.in_layers.0.bias': (1280,),
 'model.diffusion_model.middle_block.2.in_layers.2.weight': (1280, 1280, 3, 3),
 'model.diffusion_model.middle_block.2.in_layers.2.bias': (1280,),
 'model.diffusion_model.middle_block.2.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.middle_block.2.emb_layers.1.bias': (1280,),
 'model.diffusion_model.middle_block.2.out_layers.0.weight': (1280,),
 'model.diffusion_model.middle_block.2.out_layers.0.bias': (1280,),
 'model.diffusion_model.middle_block.2.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.middle_block.2.out_layers.3.bias': (1280,),
 'model.diffusion_model.output_blocks.0.0.in_layers.0.weight': (2560,),
 'model.diffusion_model.output_blocks.0.0.in_layers.0.bias': (2560,),
 'model.diffusion_model.output_blocks.0.0.in_layers.2.weight': (1280,
  2560,
  3,
  3),
 'model.diffusion_model.output_blocks.0.0.in_layers.2.bias': (1280,),
 'model.diffusion_model.output_blocks.0.0.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.output_blocks.0.0.emb_layers.1.bias': (1280,),
 'model.diffusion_model.output_blocks.0.0.out_layers.0.weight': (1280,),
 'model.diffusion_model.output_blocks.0.0.out_layers.0.bias': (1280,),
 'model.diffusion_model.output_blocks.0.0.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.output_blocks.0.0.out_layers.3.bias': (1280,),
 'model.diffusion_model.output_blocks.0.0.skip_connection.weight': (1280,
  2560,
  1,
  1),
 'model.diffusion_model.output_blocks.0.0.skip_connection.bias': (1280,),
 'model.diffusion_model.output_blocks.1.0.in_layers.0.weight': (2560,),
 'model.diffusion_model.output_blocks.1.0.in_layers.0.bias': (2560,),
 'model.diffusion_model.output_blocks.1.0.in_layers.2.weight': (1280,
  2560,
  3,
  3),
 'model.diffusion_model.output_blocks.1.0.in_layers.2.bias': (1280,),
 'model.diffusion_model.output_blocks.1.0.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.output_blocks.1.0.emb_layers.1.bias': (1280,),
 'model.diffusion_model.output_blocks.1.0.out_layers.0.weight': (1280,),
 'model.diffusion_model.output_blocks.1.0.out_layers.0.bias': (1280,),
 'model.diffusion_model.output_blocks.1.0.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.output_blocks.1.0.out_layers.3.bias': (1280,),
 'model.diffusion_model.output_blocks.1.0.skip_connection.weight': (1280,
  2560,
  1,
  1),
 'model.diffusion_model.output_blocks.1.0.skip_connection.bias': (1280,),
 'model.diffusion_model.output_blocks.2.0.in_layers.0.weight': (2560,),
 'model.diffusion_model.output_blocks.2.0.in_layers.0.bias': (2560,),
 'model.diffusion_model.output_blocks.2.0.in_layers.2.weight': (1280,
  2560,
  3,
  3),
 'model.diffusion_model.output_blocks.2.0.in_layers.2.bias': (1280,),
 'model.diffusion_model.output_blocks.2.0.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.output_blocks.2.0.emb_layers.1.bias': (1280,),
 'model.diffusion_model.output_blocks.2.0.out_layers.0.weight': (1280,),
 'model.diffusion_model.output_blocks.2.0.out_layers.0.bias': (1280,),
 'model.diffusion_model.output_blocks.2.0.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.output_blocks.2.0.out_layers.3.bias': (1280,),
 'model.diffusion_model.output_blocks.2.0.skip_connection.weight': (1280,
  2560,
  1,
  1),
 'model.diffusion_model.output_blocks.2.0.skip_connection.bias': (1280,),
 'model.diffusion_model.output_blocks.2.1.conv.weight': (1280, 1280, 3, 3),
 'model.diffusion_model.output_blocks.2.1.conv.bias': (1280,),
 'model.diffusion_model.output_blocks.3.0.in_layers.0.weight': (2560,),
 'model.diffusion_model.output_blocks.3.0.in_layers.0.bias': (2560,),
 'model.diffusion_model.output_blocks.3.0.in_layers.2.weight': (1280,
  2560,
  3,
  3),
 'model.diffusion_model.output_blocks.3.0.in_layers.2.bias': (1280,),
 'model.diffusion_model.output_blocks.3.0.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.output_blocks.3.0.emb_layers.1.bias': (1280,),
 'model.diffusion_model.output_blocks.3.0.out_layers.0.weight': (1280,),
 'model.diffusion_model.output_blocks.3.0.out_layers.0.bias': (1280,),
 'model.diffusion_model.output_blocks.3.0.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.output_blocks.3.0.out_layers.3.bias': (1280,),
 'model.diffusion_model.output_blocks.3.0.skip_connection.weight': (1280,
  2560,
  1,
  1),
 'model.diffusion_model.output_blocks.3.0.skip_connection.bias': (1280,),
 'model.diffusion_model.output_blocks.3.1.norm.weight': (1280,),
 'model.diffusion_model.output_blocks.3.1.norm.bias': (1280,),
 'model.diffusion_model.output_blocks.3.1.proj_in.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.output_blocks.3.1.proj_in.bias': (1280,),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias': (1280,),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight': (10240,
  1280),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias': (10240,),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight': (1280,
  5120),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias': (1280,),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight': (1280,
  1024),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight': (1280,
  1024),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias': (1280,),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight': (1280,),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias': (1280,),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight': (1280,),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias': (1280,),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight': (1280,),
 'model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias': (1280,),
 'model.diffusion_model.output_blocks.3.1.proj_out.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.output_blocks.3.1.proj_out.bias': (1280,),
 'model.diffusion_model.output_blocks.4.0.in_layers.0.weight': (2560,),
 'model.diffusion_model.output_blocks.4.0.in_layers.0.bias': (2560,),
 'model.diffusion_model.output_blocks.4.0.in_layers.2.weight': (1280,
  2560,
  3,
  3),
 'model.diffusion_model.output_blocks.4.0.in_layers.2.bias': (1280,),
 'model.diffusion_model.output_blocks.4.0.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.output_blocks.4.0.emb_layers.1.bias': (1280,),
 'model.diffusion_model.output_blocks.4.0.out_layers.0.weight': (1280,),
 'model.diffusion_model.output_blocks.4.0.out_layers.0.bias': (1280,),
 'model.diffusion_model.output_blocks.4.0.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.output_blocks.4.0.out_layers.3.bias': (1280,),
 'model.diffusion_model.output_blocks.4.0.skip_connection.weight': (1280,
  2560,
  1,
  1),
 'model.diffusion_model.output_blocks.4.0.skip_connection.bias': (1280,),
 'model.diffusion_model.output_blocks.4.1.norm.weight': (1280,),
 'model.diffusion_model.output_blocks.4.1.norm.bias': (1280,),
 'model.diffusion_model.output_blocks.4.1.proj_in.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.output_blocks.4.1.proj_in.bias': (1280,),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias': (1280,),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight': (10240,
  1280),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias': (10240,),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight': (1280,
  5120),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias': (1280,),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight': (1280,
  1024),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight': (1280,
  1024),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias': (1280,),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight': (1280,),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias': (1280,),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight': (1280,),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias': (1280,),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight': (1280,),
 'model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias': (1280,),
 'model.diffusion_model.output_blocks.4.1.proj_out.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.output_blocks.4.1.proj_out.bias': (1280,),
 'model.diffusion_model.output_blocks.5.0.in_layers.0.weight': (1920,),
 'model.diffusion_model.output_blocks.5.0.in_layers.0.bias': (1920,),
 'model.diffusion_model.output_blocks.5.0.in_layers.2.weight': (1280,
  1920,
  3,
  3),
 'model.diffusion_model.output_blocks.5.0.in_layers.2.bias': (1280,),
 'model.diffusion_model.output_blocks.5.0.emb_layers.1.weight': (1280, 1280),
 'model.diffusion_model.output_blocks.5.0.emb_layers.1.bias': (1280,),
 'model.diffusion_model.output_blocks.5.0.out_layers.0.weight': (1280,),
 'model.diffusion_model.output_blocks.5.0.out_layers.0.bias': (1280,),
 'model.diffusion_model.output_blocks.5.0.out_layers.3.weight': (1280,
  1280,
  3,
  3),
 'model.diffusion_model.output_blocks.5.0.out_layers.3.bias': (1280,),
 'model.diffusion_model.output_blocks.5.0.skip_connection.weight': (1280,
  1920,
  1,
  1),
 'model.diffusion_model.output_blocks.5.0.skip_connection.bias': (1280,),
 'model.diffusion_model.output_blocks.5.1.norm.weight': (1280,),
 'model.diffusion_model.output_blocks.5.1.norm.bias': (1280,),
 'model.diffusion_model.output_blocks.5.1.proj_in.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.output_blocks.5.1.proj_in.bias': (1280,),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias': (1280,),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight': (10240,
  1280),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias': (10240,),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight': (1280,
  5120),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias': (1280,),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight': (1280,
  1024),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight': (1280,
  1024),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight': (1280,
  1280),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias': (1280,),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight': (1280,),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias': (1280,),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight': (1280,),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias': (1280,),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight': (1280,),
 'model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias': (1280,),
 'model.diffusion_model.output_blocks.5.1.proj_out.weight': (1280, 1280, 1, 1),
 'model.diffusion_model.output_blocks.5.1.proj_out.bias': (1280,),
 'model.diffusion_model.output_blocks.5.2.conv.weight': (1280, 1280, 3, 3),
 'model.diffusion_model.output_blocks.5.2.conv.bias': (1280,),
 'model.diffusion_model.output_blocks.6.0.in_layers.0.weight': (1920,),
 'model.diffusion_model.output_blocks.6.0.in_layers.0.bias': (1920,),
 'model.diffusion_model.output_blocks.6.0.in_layers.2.weight': (640,
  1920,
  3,
  3),
 'model.diffusion_model.output_blocks.6.0.in_layers.2.bias': (640,),
 'model.diffusion_model.output_blocks.6.0.emb_layers.1.weight': (640, 1280),
 'model.diffusion_model.output_blocks.6.0.emb_layers.1.bias': (640,),
 'model.diffusion_model.output_blocks.6.0.out_layers.0.weight': (640,),
 'model.diffusion_model.output_blocks.6.0.out_layers.0.bias': (640,),
 'model.diffusion_model.output_blocks.6.0.out_layers.3.weight': (640,
  640,
  3,
  3),
 'model.diffusion_model.output_blocks.6.0.out_layers.3.bias': (640,),
 'model.diffusion_model.output_blocks.6.0.skip_connection.weight': (640,
  1920,
  1,
  1),
 'model.diffusion_model.output_blocks.6.0.skip_connection.bias': (640,),
 'model.diffusion_model.output_blocks.6.1.norm.weight': (640,),
 'model.diffusion_model.output_blocks.6.1.norm.bias': (640,),
 'model.diffusion_model.output_blocks.6.1.proj_in.weight': (640, 640, 1, 1),
 'model.diffusion_model.output_blocks.6.1.proj_in.bias': (640,),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight': (640,
  640),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight': (640,
  640),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight': (640,
  640),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight': (640,
  640),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias': (640,),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight': (5120,
  640),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias': (5120,),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight': (640,
  2560),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias': (640,),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight': (640,
  640),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight': (640,
  1024),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight': (640,
  1024),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight': (640,
  640),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias': (640,),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight': (640,),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias': (640,),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight': (640,),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias': (640,),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight': (640,),
 'model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias': (640,),
 'model.diffusion_model.output_blocks.6.1.proj_out.weight': (640, 640, 1, 1),
 'model.diffusion_model.output_blocks.6.1.proj_out.bias': (640,),
 'model.diffusion_model.output_blocks.7.0.in_layers.0.weight': (1280,),
 'model.diffusion_model.output_blocks.7.0.in_layers.0.bias': (1280,),
 'model.diffusion_model.output_blocks.7.0.in_layers.2.weight': (640,
  1280,
  3,
  3),
 'model.diffusion_model.output_blocks.7.0.in_layers.2.bias': (640,),
 'model.diffusion_model.output_blocks.7.0.emb_layers.1.weight': (640, 1280),
 'model.diffusion_model.output_blocks.7.0.emb_layers.1.bias': (640,),
 'model.diffusion_model.output_blocks.7.0.out_layers.0.weight': (640,),
 'model.diffusion_model.output_blocks.7.0.out_layers.0.bias': (640,),
 'model.diffusion_model.output_blocks.7.0.out_layers.3.weight': (640,
  640,
  3,
  3),
 'model.diffusion_model.output_blocks.7.0.out_layers.3.bias': (640,),
 'model.diffusion_model.output_blocks.7.0.skip_connection.weight': (640,
  1280,
  1,
  1),
 'model.diffusion_model.output_blocks.7.0.skip_connection.bias': (640,),
 'model.diffusion_model.output_blocks.7.1.norm.weight': (640,),
 'model.diffusion_model.output_blocks.7.1.norm.bias': (640,),
 'model.diffusion_model.output_blocks.7.1.proj_in.weight': (640, 640, 1, 1),
 'model.diffusion_model.output_blocks.7.1.proj_in.bias': (640,),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight': (640,
  640),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight': (640,
  640),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight': (640,
  640),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight': (640,
  640),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias': (640,),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight': (5120,
  640),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias': (5120,),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight': (640,
  2560),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias': (640,),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight': (640,
  640),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight': (640,
  1024),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight': (640,
  1024),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight': (640,
  640),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias': (640,),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight': (640,),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias': (640,),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight': (640,),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias': (640,),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight': (640,),
 'model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias': (640,),
 'model.diffusion_model.output_blocks.7.1.proj_out.weight': (640, 640, 1, 1),
 'model.diffusion_model.output_blocks.7.1.proj_out.bias': (640,),
 'model.diffusion_model.output_blocks.8.0.in_layers.0.weight': (960,),
 'model.diffusion_model.output_blocks.8.0.in_layers.0.bias': (960,),
 'model.diffusion_model.output_blocks.8.0.in_layers.2.weight': (640,
  960,
  3,
  3),
 'model.diffusion_model.output_blocks.8.0.in_layers.2.bias': (640,),
 'model.diffusion_model.output_blocks.8.0.emb_layers.1.weight': (640, 1280),
 'model.diffusion_model.output_blocks.8.0.emb_layers.1.bias': (640,),
 'model.diffusion_model.output_blocks.8.0.out_layers.0.weight': (640,),
 'model.diffusion_model.output_blocks.8.0.out_layers.0.bias': (640,),
 'model.diffusion_model.output_blocks.8.0.out_layers.3.weight': (640,
  640,
  3,
  3),
 'model.diffusion_model.output_blocks.8.0.out_layers.3.bias': (640,),
 'model.diffusion_model.output_blocks.8.0.skip_connection.weight': (640,
  960,
  1,
  1),
 'model.diffusion_model.output_blocks.8.0.skip_connection.bias': (640,),
 'model.diffusion_model.output_blocks.8.1.norm.weight': (640,),
 'model.diffusion_model.output_blocks.8.1.norm.bias': (640,),
 'model.diffusion_model.output_blocks.8.1.proj_in.weight': (640, 640, 1, 1),
 'model.diffusion_model.output_blocks.8.1.proj_in.bias': (640,),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight': (640,
  640),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight': (640,
  640),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight': (640,
  640),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight': (640,
  640),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias': (640,),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight': (5120,
  640),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias': (5120,),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight': (640,
  2560),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias': (640,),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight': (640,
  640),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight': (640,
  1024),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight': (640,
  1024),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight': (640,
  640),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias': (640,),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight': (640,),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias': (640,),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight': (640,),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias': (640,),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight': (640,),
 'model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias': (640,),
 'model.diffusion_model.output_blocks.8.1.proj_out.weight': (640, 640, 1, 1),
 'model.diffusion_model.output_blocks.8.1.proj_out.bias': (640,),
 'model.diffusion_model.output_blocks.8.2.conv.weight': (640, 640, 3, 3),
 'model.diffusion_model.output_blocks.8.2.conv.bias': (640,),
 'model.diffusion_model.output_blocks.9.0.in_layers.0.weight': (960,),
 'model.diffusion_model.output_blocks.9.0.in_layers.0.bias': (960,),
 'model.diffusion_model.output_blocks.9.0.in_layers.2.weight': (320,
  960,
  3,
  3),
 'model.diffusion_model.output_blocks.9.0.in_layers.2.bias': (320,),
 'model.diffusion_model.output_blocks.9.0.emb_layers.1.weight': (320, 1280),
 'model.diffusion_model.output_blocks.9.0.emb_layers.1.bias': (320,),
 'model.diffusion_model.output_blocks.9.0.out_layers.0.weight': (320,),
 'model.diffusion_model.output_blocks.9.0.out_layers.0.bias': (320,),
 'model.diffusion_model.output_blocks.9.0.out_layers.3.weight': (320,
  320,
  3,
  3),
 'model.diffusion_model.output_blocks.9.0.out_layers.3.bias': (320,),
 'model.diffusion_model.output_blocks.9.0.skip_connection.weight': (320,
  960,
  1,
  1),
 'model.diffusion_model.output_blocks.9.0.skip_connection.bias': (320,),
 'model.diffusion_model.output_blocks.9.1.norm.weight': (320,),
 'model.diffusion_model.output_blocks.9.1.norm.bias': (320,),
 'model.diffusion_model.output_blocks.9.1.proj_in.weight': (320, 320, 1, 1),
 'model.diffusion_model.output_blocks.9.1.proj_in.bias': (320,),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight': (320,
  320),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight': (320,
  320),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight': (320,
  320),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight': (320,
  320),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias': (320,),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight': (2560,
  320),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias': (2560,),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight': (320,
  1280),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias': (320,),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight': (320,
  320),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight': (320,
  1024),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight': (320,
  1024),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight': (320,
  320),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias': (320,),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight': (320,),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias': (320,),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight': (320,),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias': (320,),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight': (320,),
 'model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias': (320,),
 'model.diffusion_model.output_blocks.9.1.proj_out.weight': (320, 320, 1, 1),
 'model.diffusion_model.output_blocks.9.1.proj_out.bias': (320,),
 'model.diffusion_model.output_blocks.10.0.in_layers.0.weight': (640,),
 'model.diffusion_model.output_blocks.10.0.in_layers.0.bias': (640,),
 'model.diffusion_model.output_blocks.10.0.in_layers.2.weight': (320,
  640,
  3,
  3),
 'model.diffusion_model.output_blocks.10.0.in_layers.2.bias': (320,),
 'model.diffusion_model.output_blocks.10.0.emb_layers.1.weight': (320, 1280),
 'model.diffusion_model.output_blocks.10.0.emb_layers.1.bias': (320,),
 'model.diffusion_model.output_blocks.10.0.out_layers.0.weight': (320,),
 'model.diffusion_model.output_blocks.10.0.out_layers.0.bias': (320,),
 'model.diffusion_model.output_blocks.10.0.out_layers.3.weight': (320,
  320,
  3,
  3),
 'model.diffusion_model.output_blocks.10.0.out_layers.3.bias': (320,),
 'model.diffusion_model.output_blocks.10.0.skip_connection.weight': (320,
  640,
  1,
  1),
 'model.diffusion_model.output_blocks.10.0.skip_connection.bias': (320,),
 'model.diffusion_model.output_blocks.10.1.norm.weight': (320,),
 'model.diffusion_model.output_blocks.10.1.norm.bias': (320,),
 'model.diffusion_model.output_blocks.10.1.proj_in.weight': (320, 320, 1, 1),
 'model.diffusion_model.output_blocks.10.1.proj_in.bias': (320,),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight': (320,
  320),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight': (320,
  320),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight': (320,
  320),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight': (320,
  320),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias': (320,),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight': (2560,
  320),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias': (2560,),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight': (320,
  1280),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias': (320,),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight': (320,
  320),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight': (320,
  1024),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight': (320,
  1024),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight': (320,
  320),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias': (320,),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight': (320,),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias': (320,),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight': (320,),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias': (320,),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight': (320,),
 'model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias': (320,),
 'model.diffusion_model.output_blocks.10.1.proj_out.weight': (320, 320, 1, 1),
 'model.diffusion_model.output_blocks.10.1.proj_out.bias': (320,),
 'model.diffusion_model.output_blocks.11.0.in_layers.0.weight': (640,),
 'model.diffusion_model.output_blocks.11.0.in_layers.0.bias': (640,),
 'model.diffusion_model.output_blocks.11.0.in_layers.2.weight': (320,
  640,
  3,
  3),
 'model.diffusion_model.output_blocks.11.0.in_layers.2.bias': (320,),
 'model.diffusion_model.output_blocks.11.0.emb_layers.1.weight': (320, 1280),
 'model.diffusion_model.output_blocks.11.0.emb_layers.1.bias': (320,),
 'model.diffusion_model.output_blocks.11.0.out_layers.0.weight': (320,),
 'model.diffusion_model.output_blocks.11.0.out_layers.0.bias': (320,),
 'model.diffusion_model.output_blocks.11.0.out_layers.3.weight': (320,
  320,
  3,
  3),
 'model.diffusion_model.output_blocks.11.0.out_layers.3.bias': (320,),
 'model.diffusion_model.output_blocks.11.0.skip_connection.weight': (320,
  640,
  1,
  1),
 'model.diffusion_model.output_blocks.11.0.skip_connection.bias': (320,),
 'model.diffusion_model.output_blocks.11.1.norm.weight': (320,),
 'model.diffusion_model.output_blocks.11.1.norm.bias': (320,),
 'model.diffusion_model.output_blocks.11.1.proj_in.weight': (320, 320, 1, 1),
 'model.diffusion_model.output_blocks.11.1.proj_in.bias': (320,),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight': (320,
  320),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight': (320,
  320),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight': (320,
  320),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight': (320,
  320),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias': (320,),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight': (2560,
  320),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias': (2560,),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight': (320,
  1280),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias': (320,),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight': (320,
  320),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight': (320,
  1024),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight': (320,
  1024),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight': (320,
  320),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias': (320,),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight': (320,),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias': (320,),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight': (320,),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias': (320,),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight': (320,),
 'model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias': (320,),
 'model.diffusion_model.output_blocks.11.1.proj_out.weight': (320, 320, 1, 1),
 'model.diffusion_model.output_blocks.11.1.proj_out.bias': (320,),
 'model.diffusion_model.out.0.weight': (320,),
 'model.diffusion_model.out.0.bias': (320,),
 'model.diffusion_model.out.2.weight': (4, 320, 3, 3),
 'model.diffusion_model.out.2.bias': (4,)}



text_encoder_open_clip = {'cond_stage_model.model.positional_embedding': (77, 1024),
 'cond_stage_model.model.transformer.resblocks.0.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.0.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.0.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.0.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.0.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.0.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.0.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.1.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.1.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.1.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.1.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.1.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.1.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.1.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.2.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.2.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.2.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.2.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.2.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.2.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.2.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.3.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.3.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.3.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.3.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.3.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.3.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.3.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.4.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.4.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.4.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.4.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.4.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.4.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.4.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.5.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.5.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.5.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.5.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.5.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.5.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.5.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.6.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.6.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.6.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.6.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.6.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.6.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.6.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.7.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.7.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.7.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.7.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.7.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.7.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.7.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.8.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.8.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.8.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.8.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.8.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.8.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.8.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.9.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.9.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.9.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.9.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.9.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.9.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.9.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.9.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.9.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.9.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.9.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.10.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.10.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.10.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.10.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.10.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.10.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.10.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.11.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.11.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.11.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.11.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.11.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.11.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.11.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.12.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.12.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.12.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.12.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.12.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.12.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.12.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.13.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.13.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.13.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.13.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.13.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.13.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.13.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.14.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.14.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.14.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.14.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.14.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.14.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.14.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.15.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.15.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.15.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.15.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.15.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.15.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.15.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.16.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.16.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.16.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.16.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.16.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.16.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.16.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.17.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.17.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.17.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.17.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.17.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.17.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.17.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.18.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.18.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.18.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.18.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.18.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.18.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.18.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.19.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.19.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.19.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.19.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.19.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.19.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.19.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.20.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.20.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.20.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.20.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.20.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.20.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.20.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.21.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.21.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.21.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.21.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.21.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.21.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.21.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.22.ln_1.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.22.ln_1.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.22.attn.in_proj_weight': (3072,
  1024),
 'cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias': (3072,),
 'cond_stage_model.model.transformer.resblocks.22.attn.out_proj.weight': (1024,
  1024),
 'cond_stage_model.model.transformer.resblocks.22.attn.out_proj.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.22.ln_2.weight': (1024,),
 'cond_stage_model.model.transformer.resblocks.22.ln_2.bias': (1024,),
 'cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.weight': (4096,
  1024),
 'cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.bias': (4096,),
 'cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.weight': (1024,
  4096),
 'cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.bias': (1024,),
 # 'cond_stage_model.model.transformer.resblocks.23.ln_1.weight': (1024,),
 # 'cond_stage_model.model.transformer.resblocks.23.ln_1.bias': (1024,),
 # 'cond_stage_model.model.transformer.resblocks.23.attn.in_proj_weight': (3072,
 #  1024),
 # 'cond_stage_model.model.transformer.resblocks.23.attn.in_proj_bias': (3072,),
 # 'cond_stage_model.model.transformer.resblocks.23.attn.out_proj.weight': (1024,
 #  1024),
 # 'cond_stage_model.model.transformer.resblocks.23.attn.out_proj.bias': (1024,),
 # 'cond_stage_model.model.transformer.resblocks.23.ln_2.weight': (1024,),
 # 'cond_stage_model.model.transformer.resblocks.23.ln_2.bias': (1024,),
 # 'cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.weight': (4096,
 #  1024),
 # 'cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.bias': (4096,),
 # 'cond_stage_model.model.transformer.resblocks.23.mlp.c_proj.weight': (1024,
 #  4096),
 # 'cond_stage_model.model.transformer.resblocks.23.mlp.c_proj.bias': (1024,),
 'cond_stage_model.model.token_embedding.weight': (49408, 1024),
 'cond_stage_model.model.ln_final.weight': (1024,),
 'cond_stage_model.model.ln_final.bias': (1024,)}

